#################################################
### Replication Code for Study 2              ###
### Title: Mimicking Democracy                ###
### Authors: Hsu Yumin Wang, Eddy S. F. Yeung ###
### Version: April 26, 2024                   ###
#################################################

### Set-up ----
## Clear the work environment and set the working directory
rm(list = ls())
setwd("~/Desktop/redist-propaganda/replication/Study 2")

## Load the required packages
library(tidyverse)   # version 2.0.0
library(estimatr)    # version 1.0.0
library(parameters)  # version 0.15.0
library(texreg)      # version 1.37.5
library(haven)       # version 2.4.3
library(car)         # version 3.0-12
library(TOSTER)      # version 0.4.1
library(interflex)   # version 1.2.6
library(mice)        # version 3.14.0
library(psych)       # version 2.1.9
library(pscl)        # version 1.5.5
library(gridExtra)   # version 2.3
library(lemon)       # version 0.4.7
library(cowplot)     # version 1.1.1

## Read the dataset
df <- read.csv("main_data.csv")

### Code individual variables and impute the covariates ----
## Code individual variables
# Age (18-29 = 1; 60+ = 5)
table(df$age)

# Female (= 1)
df$female <- ifelse(df$gender == 2, 1, 0)
table(df$female)

# Education (1 = primary school or below; postgraduate degree = 6)
df$educ <- ifelse(df$educ == 6 | df$educ == 7, 6, df$educ)
table(df$educ)

# Income (1 = below 500 RMB; 10 = above 20,000 RMB)
table(df$income)

# CCP membership
df$ccp <- ifelse(df$party == 3, 1, 0)
table(df$ccp)

# policy trust
table(df$policy_trust)

# Screeners (estimated based on 2-parameter IRT models)
df$screener1_correct <- ifelse(df$screener1 == "3,5", 1, 0)
df$screener2_correct <- ifelse(df$screener2 == "5,11", 1, 0)
df$screener3_correct <- ifelse(df$TIPI_X == 3, 1, 0)
df$screener4_correct <- ifelse(df$TIPI_Y == 1, 1, 0)
df_IRT <- dplyr::select(df,
                        screener1_correct, screener2_correct,
                        screener3_correct, screener4_correct)
df_IRT <- rollcall(df_IRT,
                   yea = 1, nay = 0, missing = c(NA), notInLegis = 9,
                   legis.names = NULL, vote.names = NULL,
                   legis.data = NULL, vote.data = NULL,
                   desc = NULL, source = NULL)
set.seed(1)
model_screeners <- ideal(df_IRT,
                         store.item = T, normalize = T,
                         maxiter = 10000, thin = 10)
df$attentiveness <- model_screeners$xbar[ , 1]

# Table S4
df_temp <- subset(df, group == 0 | group == 1 | group == 2)
summary(df_temp$screener1_correct)
sd(df_temp$screener1_correct)
summary(df_temp$screener2_correct)
sd(df_temp$screener2_correct)
summary(df_temp$screener3_correct)
sd(df_temp$screener3_correct, na.rm = TRUE)
summary(df_temp$screener4_correct)
sd(df_temp$screener4_correct, na.rm = TRUE)

# Political knowledge (estimated based on 2-parameter IRT models)
df$know1_correct <- ifelse(df$know1 == 3, 1, 0)
df$know2_correct <- ifelse(df$know2 == 4, 1, 0)
df$know3_correct <- ifelse(df$know3 == 2, 1, 0)
df$know4_correct <- ifelse(df$know4 == 1, 1, 0)
df_know_IRT <- dplyr::select(df,
                             know1_correct, know2_correct,
                             know3_correct, know4_correct)
df_know_IRT <- rollcall(df_know_IRT,
                        yea = 1, nay = 0, missing = c(NA), notInLegis = 9,
                        legis.names = NULL, vote.names = NULL,
                        legis.data = NULL, vote.data = NULL,
                        desc = NULL, source = NULL)
set.seed(1)
model_knowledge <- ideal(df_know_IRT,
                         store.item = T, normalize = T,
                         maxiter = 10000, thin = 10)
df$knowledge <- model_knowledge$xbar[ , 1]

# Personality: agreeableness
df$agreeableness <- ((6 - df$TIPI_2) + df$TIPI_7) / 2
table(df$agreeableness)

# Personality: conscientiousness
df$conscientiousness <- (df$TIPI_3 + (6 - df$TIPI_8)) / 2
table(df$conscientiousness)

# Personality: extraversion
df$extraversion <- (df$TIPI_1 + (6 - df$TIPI_6)) / 2
table(df$extraversion)

# Social-equity understanding of democracy (estimated based on 2-parameter IRT models)
df$define1_social <- ifelse(df$define1 == 1, 1, 0)
df$define2_social <- ifelse(df$define2 == 1, 1, 0)
df$define3_social <- ifelse(df$define3 == 1, 1, 0)
df$define4_social <- ifelse(df$define4 == 1, 1, 0)
df_define_IRT <- dplyr::select(df,
                               define1_social, define2_social,
                               define3_social, define4_social)
df_define_IRT <- rollcall(df_define_IRT,
                          yea = 1, nay = 0, missing = c(NA), notInLegis = 9,
                          legis.names = NULL, vote.names = NULL,
                          legis.data = NULL, vote.data = NULL,
                          desc = NULL, source = NULL)
set.seed(1)
model_understanding <- ideal(df_define_IRT,
                             store.item = T, normalize = T,
                             maxiter = 10000, thin = 10)
df$understanding <- model_understanding$xbar[ , 1]

# Table S6
table(df$define1_social)
table(df$define1_social) %>% prop.table()
table(df$define2_social)
table(df$define2_social) %>% prop.table()
table(df$define3_social)
table(df$define3_social) %>% prop.table()
table(df$define4_social)
table(df$define4_social) %>% prop.table()

# Prior influence by state messaging (1 = likely pretreated; 0 = unlikely)
df$pretreat_news <- ifelse(df$pretreat_news == 8, 0, df$pretreat_news)
df$pretreated <- ifelse(df$pretreat_news > median(df$pretreat_news, na.rm = T) &
                          df$pretreat_eval > median(df$pretreat_eval, na.rm = T),
                        1, 0)
table(df$pretreated)

# Covid satisfaction
table(df$covid)
table(df$covid) %>% prop.table()

# Treatment conditions
table(df$group)

# Democratic legitimacy (mean effects index with imputation)
cal_MEI <- 
  function(Z, outcome_mat, to_reorient, reorient = F, greedy = T, impute = F) {
    if(impute == T) {
      R <- 1 * is.na(outcome_mat)
      means_for_imputation <- 
        rbind(apply(outcome_mat[Z == 0, ], MAR = 2, FUN = mean, na.rm = T),
              apply(outcome_mat[Z == 1, ], MAR = 2, FUN = mean, na.rm = T),
              apply(outcome_mat[Z == 2, ], MAR = 2, FUN = mean, na.rm = T))
      to_impute <- R * means_for_imputation[Z + 1, ]
      outcome_mat[is.na(outcome_mat)] <- 0
      outcome_mat <- outcome_mat + to_impute
    }
    c_mean <- apply(X = outcome_mat[Z == 0, ], MARGIN = 2, FUN = mean, na.rm = T)
    c_sd <- apply(X = outcome_mat[Z == 0, ], MARGIN = 2, FUN = sd, na.rm = T)
    z_score <- t(t(sweep(outcome_mat, 2, c_mean)) / c_sd)
    index_numerator <- rowSums(z_score)
    if(greedy == T) {
      n_outcomes <- rowSums(!is.na(z_score))
    }
    else if(greedy == F){
      n_outcomes <- ncol(outcome_mat)
    }
    index <- index_numerator / n_outcomes
    index <- (index - mean(index[Z == 0], na.rm = T)) / sd(index[Z == 0], na.rm = T)
    return(index)
  }
vio_matrix <- cbind(df$demo1, df$demo2, 10 - df$demo3)
set.seed(1)
df <- df %>% 
  mutate(democraticness = cal_MEI(Z = df$group, outcome_mat = vio_matrix, 
                                  impute = T))

# Satisfaction with the government's performance in promoting social equity
# (-3 = very dissatisfied, 3 = very satisfied)
df <- df %>% 
  mutate(satisfaction = case_when(
    equity2a == 1 ~ 2,
    equity2a == 2 ~ 3,
    equity2b == 1 ~ -2,
    equity2b == 2 ~ -3,
    equity2c == 1 ~ 1,
    equity2c == 2 ~ -1,
    equity2c == 3 ~ 0
  ))
table(df$satisfaction)

# Perceived extent to which the government cares about improving social equity
# (-3 = does not care a great deal; 3 = cares a great deal)
df <- df %>% 
  mutate(care = case_when(
    care2a == 1 ~ 2,
    care2a == 2 ~ 3,
    care2b == 1 ~ -2,
    care2b == 2 ~ -3,
    care2c == 1 ~ 1,
    care2c == 2 ~ -1,
    care2c == 3 ~ 0
  ))
table(df$care)

# Evaluation of rule of law development in China
# (-3 = very not well-developed; 3 = very well-developed)
df <- df %>% 
  mutate(rule_of_law = case_when(
    law2a == 1 ~ 2,
    law2a == 2 ~ 3,
    law2b == 1 ~ -2,
    law2b == 2 ~ -3,
    law2c == 1 ~ 1,
    law2c == 2 ~ -1,
    law2c == 3 ~ 0
  ))
table(df$rule_of_law)

# Evaluation of foreign affairs management in China
# (-3 = very not well-managed; 3 = very well-managed)
df <- df %>% 
  mutate(foreign_aff = case_when(
    foreign2a == 1 ~ 2,
    foreign2a == 2 ~ 3,
    foreign2b == 1 ~ -2,
    foreign2b == 2 ~ -3,
    foreign2c == 1 ~ 1,
    foreign2c == 2 ~ -1,
    foreign2c == 3 ~ 0
  ))
table(df$foreign_aff)

# Evaluation of environmental protection in China
# (-3 = very dissatisfied, 3 = very satisfied)
df <- df %>% 
  mutate(envir_protect = case_when(
    envir2a == 1 ~ 2,
    envir2a == 2 ~ 3,
    envir2b == 1 ~ -2,
    envir2b == 2 ~ -3,
    envir2c == 1 ~ 1,
    envir2c == 2 ~ -1,
    envir2c == 3 ~ 0
  ))
table(df$envir_protect)

## Impute the covariates
df_cov <- df %>%
  dplyr::select(ResponseId, age, female, educ, income, ccp, knowledge,
                policy_trust, agreeableness, extraversion, conscientiousness,
                attentiveness)
df_cov$age <- as.factor(df_cov$age)
df_cov$educ <- as.factor(df_cov$educ)
init <- mice(df_cov, maxit = 0, seed = 1)
meth <- init$method
predM <- init$predictorMatrix
imputed_data <- mice(df_cov, method = meth, predictorMatrix = predM, seed = 1)
df_imp <- complete(imputed_data, 1)
df <- df %>% left_join(df_imp, by = c("ResponseId"))

### Distribution of outcome variables in the control group ----
## How democratically is China being governed today? (0 = not at all democratic; 10 = completely democratic)
p1 <- ggplot(subset(df, group == 0 & !is.na(demo1)), aes(x = as.factor(demo1))) + 
  geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), 
           color = "black", fill = "grey80", na.rm = T) +
  scale_y_continuous(labels = scales::percent) +
  scale_x_discrete(labels = c("0" = "0\n(Not at All\nDemocratic)",
                              "10" = "10\n(Completely\nDemocratic)")) +
  theme_minimal() + 
  ggtitle("How Democratic China Is") +
  xlab("") +
  ylab("") +
  coord_cartesian(ylim = c(0, 0.25)) +
  theme(text = element_text(family = "Times", size = 8),
        axis.text = element_text(family = "Times", size = 8),
        plot.title = element_text(hjust = 0.5))
mean(subset(df, group == 0)$demo1, na.rm = T)
sd(subset(df, group == 0)$demo1, na.rm = T)
p1 <- p1 + 
  annotate("label", x = 3, y = 0.2, size = 2,
           family = "Times", label = "Control Mean = 7.26\n(SD = 2.12)")

## In your opinion how much of a democracy is China? (1 = not a democracy; 4 = a full democracy)
p2 <- ggplot(subset(df, group == 0 & !is.na(demo2)), aes(x = as.factor(demo2))) + 
  geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), 
           color = "black", fill = "grey80", na.rm = T) +
  scale_y_continuous(labels = scales::percent) +
  scale_x_discrete(labels = c("1" = "Not a\nDemocracy", 
                              "2" = "Democracy\nw/ Major\nProblems",
                              "3" = "Democracy\nw/ Minor\nProblems", 
                              "4" = "Full\nDemocracy")) +
  theme_minimal() + 
  ggtitle("How Much of a Democracy China Is") +
  xlab("") +
  ylab("") +
  theme(text = element_text(family = "Times", size = 8),
        axis.text = element_text(family = "Times", size = 8),
        plot.title = element_text(hjust = 0.5))
mean(subset(df, group == 0)$demo2, na.rm = T)
sd(subset(df, group == 0)$demo2, na.rm = T)
p2 <- p2 +
  annotate("label", x = 1.5, y = 0.4, size = 2,
           family = "Times", label = "Control Mean = 2.76\n(SD = 0.85)")

## To what extent do you think China lacks democracy? (0 = does not lack democracy at all; 10 = severely lacks democracy)
p3 <- ggplot(subset(df, group == 0 & !is.na(demo3)), aes(x = as.factor(demo3))) + 
  geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), 
           color = "black", fill = "grey80", na.rm = T) +
  scale_y_continuous(labels = scales::percent) +
  scale_x_discrete(labels = c("0" = "0\n(Smallest\nExtent)",
                              "10" = "10\n(Largest\nExtent)")) +
  theme_minimal() + 
  ggtitle("Extent to Which China Lacks Democracy") +
  xlab("") +
  ylab("") +
  coord_cartesian(ylim = c(0, 0.25)) +
  theme(text = element_text(family = "Times", size = 8),
        axis.text = element_text(family = "Times", size = 8),
        plot.title = element_text(hjust = 0.5))
mean(subset(df, group == 0)$demo3, na.rm = T)
sd(subset(df, group == 0)$demo3, na.rm = T)
p3 <- p3 +
  annotate("label", x = 9.5, y = 0.2, size = 2,
           family = "Times", label = "Control Mean = 3.87\n(SD = 2.84)")

# Save the graph (Figure S10)
plot_grid(p1, p2, p3, labels = "AUTO", ncol = 3,
          label_fontfamily = "Times", label_size = 10)
ggsave("dist_DV1.pdf", width = 10, height = 3.5)

## Satisfaction with the government's performance in promoting social equity
## (-3 = very dissatisfied, 3 = very satisfied)
p4 <- ggplot(subset(df, group == 0 & !is.na(satisfaction)), aes(x = as.factor(satisfaction))) + 
  geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), 
           color = "black", fill = "grey80", na.rm = T) +
  scale_y_continuous(labels = scales::percent) +
  scale_x_discrete(labels = c("-3" = "-3\n(Very\nDissatisfied)",
                              "0" = "0\n(Neutral)",
                              "3" = "3\n(Very\nSatisfied)")) +
  theme_minimal() + 
  ggtitle("Satisfaction with Social-Equity Performance") +
  xlab("") +
  ylab("") +
  coord_cartesian(ylim = c(0, 0.45)) +
  theme(text = element_text(family = "Times", size = 8),
        axis.text = element_text(family = "Times", size = 8),
        plot.title = element_text(hjust = 0.5))
mean(subset(df, group == 0)$satisfaction, na.rm = T)
sd(subset(df, group == 0)$satisfaction, na.rm = T)
p4 <- p4 +
  annotate("label", x = 2, y = 0.35, size = 2,
           family = "Times", label = "Control Mean = 1.38\n(SD = 1.79)")

## Perceived extent to which the government cares about improving social equity
## (-3 = does not care a great deal; 3 = cares a great deal)
p5 <- ggplot(subset(df, group == 0 & !is.na(care)), aes(x = as.factor(care))) + 
  geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), 
           color = "black", fill = "grey80", na.rm = T) +
  scale_y_continuous(labels = scales::percent) +
  scale_x_discrete(labels = c("-3" = "-3\n(Smallest\nExtent)",
                              "0" = "0\n(Completely\nNeutral)",
                              "3" = "3\n(Largest\nExtent)")) +
  theme_minimal() + 
  ggtitle("Extent to Which Government Cares about Improving Social Equity") +
  xlab("") +
  ylab("") +
  coord_cartesian(ylim = c(0, 0.45)) +
  theme(text = element_text(family = "Times", size = 8),
        axis.text = element_text(family = "Times", size = 8),
        plot.title = element_text(hjust = 0.5))
mean(subset(df, group == 0)$care, na.rm = T)
sd(subset(df, group == 0)$care, na.rm = T)
p5 <- p5 +
  annotate("label", x = 2, y = 0.35, size = 2,
           family = "Times", label = "Control Mean = 1.69\n(SD = 1.71)")

# Save the graph (Figure S11)
plot_grid(p4, p5, labels = "AUTO", ncol = 2,
          label_fontfamily = "Times", label_size = 10)
ggsave("dist_DV2_DV3.pdf", width = 10, height = 3.5)

## Evaluation of rule of law development in China
## (-3 = very not well-developed; 3 = very well-developed)
p6 <- ggplot(subset(df, group == 0 & !is.na(rule_of_law)), aes(x = as.factor(rule_of_law))) + 
  geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), 
           color = "black", fill = "grey80", na.rm = T) +
  scale_y_continuous(labels = scales::percent) +
  scale_x_discrete(labels = c("-3" = "-3\n(Very Not Well\nDeveloped)",
                              "0" = "0\n(Completely\nNeutral)",
                              "3" = "3\n(Very Well\nDeveloped)")) +
  theme_minimal() + 
  ggtitle("Evaluation of China's Rule of Law") +
  xlab("") +
  ylab("") +
  coord_cartesian(ylim = c(0, 0.45)) +
  theme(text = element_text(family = "Times", size = 8),
        axis.text = element_text(family = "Times", size = 8),
        plot.title = element_text(hjust = 0.5))
mean(subset(df, group == 0)$rule_of_law, na.rm = T)
sd(subset(df, group == 0)$rule_of_law, na.rm = T)
p6 <- p6 +
  annotate("label", x = 2, y = 0.35, size = 2,
           family = "Times", label = "Control Mean = 1.51\n(SD = 1.73)")

## Evaluation of foreign affairs management in China
## (-3 = very not well-managed; 3 = very well-managed)
p7 <- ggplot(subset(df, group == 0 & !is.na(foreign_aff)), aes(x = as.factor(foreign_aff))) + 
  geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), 
           color = "black", fill = "grey80", na.rm = T) +
  scale_y_continuous(labels = scales::percent) +
  scale_x_discrete(labels = c("-3" = "-3\n(Very Not Well\nManaged)",
                              "0" = "0\n(Completely\nNeutral)",
                              "3" = "3\n(Very Well\nManaged)")) +
  theme_minimal() + 
  ggtitle("Evaluation of China's Foreign Affairs") +
  xlab("") +
  ylab("") +
  coord_cartesian(ylim = c(0, 0.8)) +
  theme(text = element_text(family = "Times", size = 8),
        axis.text = element_text(family = "Times", size = 8),
        plot.title = element_text(hjust = 0.5))
mean(subset(df, group == 0)$foreign_aff, na.rm = T)
sd(subset(df, group == 0)$foreign_aff, na.rm = T)
p7 <- p7 +
  annotate("label", x = 2, y = .35/.45*0.8, size = 2,
           family = "Times", label = "Control Mean = 1.41\n(SD = 1.29)")

## Evaluation of environmental protection in China
## (-3 = very dissatisfied, 3 = very satisfied)
p8 <- ggplot(subset(df, group == 0 & !is.na(envir_protect)), aes(x = as.factor(envir_protect))) + 
  geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), 
           color = "black", fill = "grey80", na.rm = T) +
  scale_y_continuous(labels = scales::percent) +
  scale_x_discrete(labels = c("-3" = "-3\n(Very\nDissatisfied)",
                              "0" = "0\n(Completely\nNeutral)",
                              "3" = "3\n(Very\nSatisfied)")) +
  theme_minimal() + 
  ggtitle("Evaluation of China's Environmental Protection") +
  xlab("") +
  ylab("") +
  coord_cartesian(ylim = c(0, 0.45)) +
  theme(text = element_text(family = "Times", size = 8),
        axis.text = element_text(family = "Times", size = 8),
        plot.title = element_text(hjust = 0.5))
mean(subset(df, group == 0)$envir_protect, na.rm = T)
sd(subset(df, group == 0)$envir_protect, na.rm = T)
p8 <- p8 +
  annotate("label", x = 2, y = 0.35, size = 2,
           family = "Times", label = "Control Mean = 1.75\n(SD = 1.53)")

# Save the graph (Figure S12)
plot_grid(p6, p7, p8, labels = "AUTO", ncol = 3,
          label_fontfamily = "Times", label_size = 10)
ggsave("dist_DV4_DV5_DV6.pdf", width = 10, height = 3.5)

### Conduct the main analyses ----
## Test H1
reg_H1 <- 
  lm_lin(democraticness ~ as.factor(group),
         covariates = ~ age.y + female.y + educ.y + income.y + ccp.y + 
           knowledge.y + policy_trust.y + agreeableness.y + extraversion.y + 
           conscientiousness.y + attentiveness.y,
         data = df)
temp_H1 <- tidy(reg_H1)
temp_H1[2, ] # ATE and p-value

## Test H2
temp_H2 <- linearHypothesis(reg_H1, "as.factor(group)1 = as.factor(group)2")
temp_H2 # p-value
temp_H1[2, "estimate"] - temp_H1[3, "estimate"] # difference in ATEs

## Test H3
reg_H3 <- 
  lm_lin(satisfaction ~ as.factor(group),
         covariates = ~ age.y + female.y + educ.y + income.y + ccp.y + 
           knowledge.y + policy_trust.y + agreeableness.y + extraversion.y + 
           conscientiousness.y + attentiveness.y,
         data = df)
temp_H3 <- tidy(reg_H3)
temp_H3[2, ] # ATE and p-value

## Test H4
reg_H4 <- 
  lm_lin(care ~ as.factor(group),
         covariates = ~ age.y + female.y + educ.y + income.y + ccp.y + 
           knowledge.y + policy_trust.y + agreeableness.y + extraversion.y + 
           conscientiousness.y + attentiveness.y,
         data = df)
temp_H4 <- tidy(reg_H4)
temp_H4[2, ] # ATE and p-value

## Summarize the results for H1 to H4 (Figure 2)
ATE_summary <- rbind(temp_H1[2:3, ], temp_H3[2:3, ], temp_H4[2:3, ])
ATE_summary$term <- rep(c("Redistributive Treatment", "Electoral Treatment"), 
                        times = 3)
ATE_summary$term <- factor(ATE_summary$term, 
                           levels = c("Redistributive Treatment",
                                      "Electoral Treatment"))
ATE_summary$outcome <- factor(ATE_summary$outcome, 
                              levels = c("care", "satisfaction", "democraticness"),
                              labels = c("Care", "Performance", "Democratic Legitimacy"))
ggplot(data = ATE_summary, 
       aes(x = estimate, y = outcome, color = term, shape = term)) +
  geom_point(position = position_dodge(-.5), size = 2) +
  scale_color_manual(values = c("grey0", "grey50")) +
  scale_shape_manual(values = c(19, 17)) +
  geom_errorbar(width = 0, aes(xmin = conf.low, xmax = conf.high), 
                position = position_dodge(-.5)) +
  xlab("Average Treatment Effect") +
  ylab("") +
  coord_cartesian(xlim = c(-0.5, 0.5)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "grey") +
  theme_classic() +
  theme(text = element_text(family = "Times", size = 12),
        axis.text = element_text(color = "black", size = 11),
        legend.justification = c(1, 1), legend.position = c(1, 1),
        legend.key.size = unit(1.5, "line"),
        legend.key.height = unit(0.5, "cm"),
        legend.margin = margin(t = -0.1, l = 0.15, b = 0.15, r = 0.15, unit = "cm"),
        legend.title = element_blank())
ggsave("H1_H2_H3_H4.pdf", width = 8, height = 4)  

## Test H5 (Figure 3)
pdf(file = "H5.pdf", width = 7.5, height = 3, family = "Times")
par(mfrow = c(1,3))

# Rule of law
TOSTtwo(m1 = mean(df$rule_of_law[df$group == 1], na.rm = TRUE), 
        sd1 = sd(df$rule_of_law[df$group == 1], na.rm = TRUE), 
        n1 = sum(!is.na(df$rule_of_law[df$group == 1])),
        m2 = mean(df$rule_of_law[df$group == 0], na.rm = TRUE),
        sd2 = sd(df$rule_of_law[df$group == 0], na.rm = TRUE),
        n2 = sum(!is.na(df$rule_of_law[df$group == 0])),
        low_eqbound_d = -.22, high_eqbound_d = .22,
        alpha = .05, var.equal = F)

# Foreign affairs
TOSTtwo(m1 = mean(df$foreign_aff[df$group == 1], na.rm = TRUE), 
        sd1 = sd(df$foreign_aff[df$group == 1], na.rm = TRUE), 
        n1 = sum(!is.na(df$foreign_aff[df$group == 1])),
        m2 = mean(df$foreign_aff[df$group == 0], na.rm = TRUE),
        sd2 = sd(df$foreign_aff[df$group == 0], na.rm = TRUE),
        n2 = sum(!is.na(df$foreign_aff[df$group == 0])),
        low_eqbound_d = -.22, high_eqbound_d = .22,
        alpha = .05, var.equal = F)

# Environmental protection
TOSTtwo(m1 = mean(df$envir_protect[df$group == 1], na.rm = TRUE), 
        sd1 = sd(df$envir_protect[df$group == 1], na.rm = TRUE), 
        n1 = sum(!is.na(df$envir_protect[df$group == 1])),
        m2 = mean(df$envir_protect[df$group == 0], na.rm = TRUE),
        sd2 = sd(df$envir_protect[df$group == 0], na.rm = TRUE),
        n2 = sum(!is.na(df$envir_protect[df$group == 0])),
        low_eqbound_d = -.22, high_eqbound_d = .22,
        alpha = .05, var.equal = F)
dev.off()

### Estimate the heterogeneous treatment effects by preregistered moderators ----
df$treatment <- as.factor(df$group)
hte <- df %>% filter(group == 0 | group == 1)

## Treatment heterogeneity by democratic understanding
out <- interflex(Y = "democraticness", D = "treatment", X = "understanding",
                 Z = c("age.y", "female.y", "educ.y", "income.y", "ccp.y", 
                       "knowledge.y", "policy_trust.y", "agreeableness.y", 
                       "extraversion.y", "conscientiousness.y", "attentiveness.y"),
                 data = hte, na.rm = TRUE,
                 estimator = "binning", vcov.type = "robust")
HTE_1 <- plot(out, theme.bw = TRUE, cex.axis = 0.8, cex.lab = 0.8, bin.labs = TRUE,
              xlab = "Social-Equity Understanding of Democracy\n", 
              ylab = "Marginal Effect on Democraticness")

## Treatment heterogeneity by policy trust
out <- interflex(Y = "care", D = "treatment", X = "policy_trust.y",
                 Z = c("age.y", "female.y", "educ.y", "income.y", "ccp.y", 
                       "knowledge.y", "policy_trust.y", "agreeableness.y", 
                       "extraversion.y", "conscientiousness.y", "attentiveness.y"),
                 data = hte, na.rm = TRUE,
                 estimator = "binning", vcov.type = "robust")
HTE_2 <- plot(out, theme.bw = TRUE, cex.axis = 0.8, cex.lab = 0.8, bin.labs = TRUE,
              xlab = "Policy Trust\n", 
              ylab = "Marginal Effect on Democraticness")

## Treatment heterogeneity by agreeableness
out <- interflex(Y = "democraticness", D = "treatment", X = "agreeableness.y",
                 Z = c("age.y", "female.y", "educ.y", "income.y", "ccp.y", 
                       "knowledge.y", "policy_trust.y", "agreeableness.y", 
                       "extraversion.y", "conscientiousness.y", "attentiveness.y"),
                 data = hte, na.rm = TRUE,
                 estimator = "binning", vcov.type = "robust")
HTE_3 <- plot(out, theme.bw = TRUE, cex.axis = 0.8, cex.lab = 0.8, bin.labs = TRUE,
              xlab = "Agreeableness\n", 
              ylab = "Marginal Effect on Democraticness")

## Treatment heterogeneity by extraversion
out <- interflex(Y = "democraticness", D = "treatment", X = "extraversion.y",
                 Z = c("age.y", "female.y", "educ.y", "income.y", "ccp.y", 
                       "knowledge.y", "policy_trust.y", "agreeableness.y", 
                       "extraversion.y", "conscientiousness.y", "attentiveness.y"),
                 data = hte, na.rm = TRUE,
                 estimator = "binning", vcov.type = "robust")
HTE_4 <-plot(out, theme.bw = TRUE, cex.axis = 0.8, cex.lab = 0.8, bin.labs = TRUE,
             xlab = "Extraversion\n", 
             ylab = "Marginal Effect on Democraticness")

## Treatment heterogeneity by conscientiousness
out <- interflex(Y = "democraticness", D = "treatment", X = "conscientiousness.y",
                 Z = c("age.y", "female.y", "educ.y", "income.y", "ccp.y", 
                       "knowledge.y", "policy_trust.y", "agreeableness.y", 
                       "extraversion.y", "conscientiousness.y", "attentiveness.y"),
                 data = hte, na.rm = TRUE,
                 estimator = "binning", vcov.type = "robust")
HTE_5 <- plot(out, theme.bw = TRUE, cex.axis = 0.8, cex.lab = 0.8, bin.labs = TRUE,
              xlab = "Conscientiousness\n", 
              ylab = "Marginal Effect on Democraticness")

## Treatment heterogeneity by income
out <- interflex(Y = "democraticness", D = "treatment", X = "income.y",
                 Z = c("age.y", "female.y", "educ.y", "income.y", "ccp.y", 
                       "knowledge.y", "policy_trust.y", "agreeableness.y", 
                       "extraversion.y", "conscientiousness.y", "attentiveness.y"),
                 data = hte, na.rm = TRUE,
                 estimator = "binning", vcov.type = "robust")
HTE_6 <- plot(out, theme.bw = TRUE, cex.axis = 0.8, cex.lab = 0.8, bin.labs = TRUE,
              xlab = "Income\n", 
              ylab = "Marginal Effect on Democraticness")

## Treatment heterogeneity by education
hte$educ <- as.numeric(hte$educ.y)
out <- interflex(Y = "democraticness", D = "treatment", X = "educ",
                 Z = c("age.y", "female.y", "educ.y", "income.y", "ccp.y", 
                       "knowledge.y", "policy_trust.y", "agreeableness.y", 
                       "extraversion.y", "conscientiousness.y", "attentiveness.y"),
                 data = hte, na.rm = TRUE,
                 estimator = "binning", vcov.type = "robust")
HTE_7 <- plot(out, theme.bw = TRUE, cex.axis = 0.8, cex.lab = 0.8, bin.labs = TRUE,
              xlab = "Education\n", 
              ylab = "Marginal Effect on Democraticness")

## Treatment heterogeneity by respondent attentiveness
out <- interflex(Y = "democraticness", D = "treatment", X = "attentiveness.y",
                 Z = c("age.y", "female.y", "educ.y", "income.y", "ccp.y", 
                       "knowledge.y", "policy_trust.y", "agreeableness.y", 
                       "extraversion.y", "conscientiousness.y", "attentiveness.y"),
                 data = hte, na.rm = TRUE,
                 estimator = "binning", vcov.type = "robust")
HTE_8 <- plot(out, theme.bw = TRUE, cex.axis = 0.8, cex.lab = 0.8, bin.labs = TRUE,
              xlab = "Respondent Attentiveness\n", 
              ylab = "Marginal Effect on Democraticness")

# Further test the interaction effect by respondent attentiveness
lm_robust(democraticness ~ group * attentiveness.y, 
          data = df, subset = (group == 0 | group == 1))

# Save the graph (Figure S13)
HTE <- grid.arrange(HTE_1, HTE_2, HTE_3, HTE_4, HTE_5, HTE_6, HTE_7, HTE_8, ncol = 2)
ggsave("HTE.pdf", HTE, width = 10, height = 14) 

## Treatment heterogeneity by prior influence by state messaging (Figure S14)
reg_pretreat1 <- 
  lm_lin(democraticness ~ as.factor(group),
         covariates = ~ age.y + female.y + educ.y + income.y + ccp.y + 
           knowledge.y + policy_trust.y + agreeableness.y + extraversion.y + 
           conscientiousness.y + attentiveness.y,
         subset = pretreated == 1,
         data = hte)
temp_pretreat1 <- tidy(reg_pretreat1)
reg_pretreat0 <- 
  lm_lin(democraticness ~ as.factor(group),
         covariates = ~ age.y + female.y + educ.y + income.y + ccp.y + 
           knowledge.y + policy_trust.y + agreeableness.y + extraversion.y + 
           conscientiousness.y + attentiveness.y,
         subset = pretreated == 0,
         data = hte)
temp_pretreat0 <- tidy(reg_pretreat0)
pretreat_summary <- rbind(temp_pretreat1[2, ], temp_pretreat0[2, ])
pretreat_summary$term <- rep(c("Redistributive Treatment"), 
                             times = 2)
pretreat_summary$term <- factor(pretreat_summary$term, 
                                levels = c("Redistributive Treatment"))
pretreat_summary$pretreated <- c("Likely Pretreated",
                                 "Unlikely Pretreated")
pretreat_summary$pretreated <- factor(pretreat_summary$pretreated, 
                                      levels = c("Unlikely Pretreated",
                                                 "Likely Pretreated"))
ggplot(data = pretreat_summary, aes(x = pretreated, y = estimate)) +
  geom_point(size = 2, color = "black") +
  scale_color_manual(values = c("black")) +
  geom_errorbar(width = 0, aes(ymin = conf.low, ymax = conf.high)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "grey") +
  xlab("Prior Exposure to State Messaging") +
  ylab("Conditional Average Treatment Effect") +
  coord_cartesian(ylim = c(-0.25, 0.5)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "grey") +
  theme_bw() + 
  theme(text = element_text(family = "Times", size = 10),
        axis.text = element_text(family = "Times", size = 9),
        plot.title = element_text(hjust = 0.5))
ggsave("pretreatment.pdf", width = 6, height = 4)

### Sample characteristics ----
library(fastDummies) # version 1.7.3
library(plotrix)     # version 3.8-2

## Location (i.e., Hukou type)
# Create a series of dummy variables to facilitate calculating mean and std
hukou_matrix <- df %>% select(hukou) %>% dummy_cols()

# Make a dataframe
Hukou <- data.frame(Values = c(0.304, 
                               0.696, 
                               mean(hukou_matrix$hukou_2, na.rm = TRUE),
                               mean(hukou_matrix$hukou_1, na.rm = TRUE)),
                    sd = c(0,
                           0,
                           std.error(hukou_matrix$hukou_2, na.rm = TRUE),
                           std.error(hukou_matrix$hukou_1, na.rm = TRUE)
                           ),
                    Attributes = c("Rural", "Urban", "Rural", "Urban"),
                    dataset = c("China Internet", "China Internet",
                                "Our Sample", "Our Sample"))

# Visualize the results
descriptive_location <- ggplot(Hukou, aes(x = Attributes, y = Values, fill = dataset)) +
  geom_bar(stat = "identity", position = position_dodge(), color = "black") +
  scale_fill_grey() +
  geom_errorbar(aes(ymin = Values - 1.96*sd, ymax = Values + 1.96*sd), width = .2,
                position = position_dodge(.9)) +
  theme_minimal() +
  xlab("Location") +
  ylab("Proportion") +
  coord_cartesian(ylim = c(0, 1)) +
  theme(legend.position = "top",
        legend.title = element_blank(),
        text = element_text(family = "Times", size = 12),
        axis.text = element_text(family = "Times", size = 11))

## Female
female_matrix <- df %>% select(female.x) %>% dummy_cols()

# Make a dataframe
Gender <- data.frame(Values = c(0.51,
                                0.49, 
                                mean(female_matrix$female.x_0, na.rm = TRUE),
                                mean(female_matrix$female.x_1, na.rm = TRUE)),
                     sd = c(0,
                            0,
                            std.error(female_matrix$female.x_0, na.rm = TRUE),
                            std.error(female_matrix$female.x_1, na.rm = TRUE)
                            ),
                     Attributes = c("Male", "Female", "Male", "Female"),
                     dataset = c("China Internet", "China Internet",
                                 "Our Sample", "Our Sample"))

# Visualize the results
descriptive_gender <- ggplot(Gender, aes(x = Attributes, y = Values, fill = dataset)) +
  geom_bar(stat = "identity", position = position_dodge(), color = "black") +
  geom_errorbar(aes(ymin = Values - 1.96*sd, ymax = Values + 1.96*sd), width = .2,
                position = position_dodge(.9)) +
  scale_fill_grey() +
  theme_minimal() +
  xlab("Gender") +
  ylab("") +
  coord_cartesian(ylim = c(0, 1)) +
  theme(legend.position = "top",
        legend.title = element_blank(),
        text = element_text(family = "Times", size = 12),
        axis.text = element_text(family = "Times", size = 11))

## Age
# First, recode the values for comparisons in alignment with China Internet census data
df$age.x[df$age.x == 3] <- 2
df$age.x[df$age.x == 4] <- 3
df$age.x[df$age.x == 5] <- 3

# Create a series of dummy variables to facilitate calculating mean and std.error
age_matrix <- df %>% select(age.x) %>% dummy_cols()

# Make a dataframe
Age <- data.frame(Values = c(0.382, 0.391, 0.228, 
                             mean(age_matrix$age.x_1, na.rm = TRUE),
                             mean(age_matrix$age.x_2, na.rm = TRUE), 
                             mean(age_matrix$age.x_3, na.rm = TRUE)),
                  sd = c(0,
                         0,
                         0,
                         std.error(age_matrix$age.x_1, na.rm = TRUE),
                         std.error(age_matrix$age.x_2, na.rm = TRUE),
                         std.error(age_matrix$age.x_3, na.rm = TRUE)),
                  Attributes = c("< 30", "30-49", "50+", "< 30", "30-49", "50+"),
                  dataset = c("China Internet", "China Internet", "China Internet",
                              "Our Sample", "Our Sample", "Our Sample"))

# Visualize the results
descriptive_age <- ggplot(Age, aes(x = Attributes, y = Values, fill = dataset)) +
  geom_bar(stat = "identity", position = position_dodge(), color = "black") +
  geom_errorbar(aes(ymin = Values - 1.96*sd, ymax = Values + 1.96*sd), width = .2,
                position = position_dodge(.9)) +
  scale_fill_grey() +
  theme_minimal() +
  xlab("Age") +
  ylab("") +
  coord_cartesian(ylim = c(0, 1)) +
  theme(legend.position = "top",
        legend.title = element_blank(),
        text = element_text(family = "Times", size = 12),
        axis.text = element_text(family = "Times", size = 11))

## Education
# First, recode the values for comparisons in alignment China Internet census data
df$educ.x[df$educ.x == 6] <- 5

# Create a series of dummy variables to facilitate calculating mean and std.error
edu_matrix <- df %>% select(educ.x) %>% dummy_cols()

# Make a dataframe
Education <- data.frame(Values = c(0.192, 
                                   0.405, 
                                   0.215, 
                                   0.1, 
                                   0.088,
                                   mean(edu_matrix$educ.x_1, na.rm = TRUE), 
                                   mean(edu_matrix$educ.x_2, na.rm = TRUE), 
                                   mean(edu_matrix$educ.x_3, na.rm = TRUE), 
                                   mean(edu_matrix$educ.x_4, na.rm = TRUE), 
                                   mean(edu_matrix$educ.x_5, na.rm = TRUE)),
                        sd = c(0,
                               0,
                               0,
                               0,
                               0,
                               std.error(edu_matrix$educ.x_1, na.rm = TRUE),
                               std.error(edu_matrix$educ.x_2, na.rm = TRUE),
                               std.error(edu_matrix$educ.x_3, na.rm = TRUE),
                               std.error(edu_matrix$educ.x_4, na.rm = TRUE),
                               std.error(edu_matrix$educ.x_5, na.rm = TRUE)
                               ),
                        Attributes = c("Primary school or below", "Junior middle school", 
                                       "Senior middle school", "Three-year college", 
                                       "University or above", "Primary school or below", 
                                       "Junior middle school", "Senior middle school", 
                                       "Three-year college", "University or above"),
                        dataset = c("China Internet", "China Internet", "China Internet", 
                                    "China Internet", "China Internet", "Our Sample",
                                    "Our Sample", "Our Sample", "Our Sample", "Our Sample"))

# Visualize the results
Education$Attributes <- factor(Education$Attributes, 
                               levels = c("Primary school or below", "Junior middle school", 
                                          "Senior middle school", "Three-year college", 
                                          "University or above"))

descriptive_education <- ggplot(Education, aes(x = Attributes, y = Values, fill = dataset)) +
  geom_bar(stat = "identity", position = position_dodge(), color = "black") +
  geom_errorbar(aes(ymin = Values - 1.96*sd, ymax = Values + 1.96*sd), width = .2,
                position = position_dodge(.9)) +
  scale_fill_grey() +
  theme_minimal() +
  xlab("Education Level") +
  ylab("Proportion") +
  coord_cartesian(ylim = c(0, 1)) +
  theme(legend.position = "top",
        legend.title = element_blank(),
        text = element_text(family = "Times", size = 12),
        axis.text = element_text(family = "Times", size = 11))

# Putting all together
nt <- theme(legend.position = "none")
descriptive_comparison <- 
  grid_arrange_shared_legend(descriptive_education, 
                             arrangeGrob(
                               descriptive_location + nt, 
                               descriptive_age + nt, 
                               descriptive_gender + nt,
                               ncol = 3), 
                             ncol = 1, nrow = 2)
ggsave("descriptive_comparison.pdf", descriptive_comparison, width = 10, height = 8) 

### Assessing differential attrition ----
## Focus only on respondents who gave consent and were in Control or Redistributionist Propaganda group
df_filter <- df %>% filter(consent == 1, group != 2)

## Ability of covariates to predict whether respondents finished the survey
reg1 <- lm_robust(Finished ~ age.x + female.x + educ.x + income.x + ccp.x +
            knowledge.x + policy_trust.x + agreeableness.x + extraversion.x +
            conscientiousness.x + attentiveness.x, 
          data = df_filter)
summary(reg1) # F(11, 1414) = 1.319, p = 0.207

## Ability of covariates to predict treatment status, conditional on finishing the survey
reg2 <- lm_robust(group ~ age.x + female.x + educ.x + income.x + ccp.x +
            knowledge.x + policy_trust.x + agreeableness.x + extraversion.x +
            conscientiousness.x + attentiveness.x,
          data = df_filter, subset = Finished == 1)
summary(reg2) # F(11, 1397) = 0.6123, p = 0.820

## Generate a regression table (Table S5)
texreg(list(reg1, reg2), 
       include.ci = FALSE, stars = numeric(0), digits = 3,
       custom.header = list("Finished the Survey (= 1)" = 1, "Assigned to Treatment (= 1)" = 2),
       custom.note = "Entries are OLS estimates with robust standard errors in parentheses.",
       fontsize = "small")

## Attrition timeline (Figure S9)
# devtools::install_github('lbassan/attritevis')
library(attritevis) # version 0.1.0
library(waffle)     # version 1.0.2
library(ggpubr)     # version 0.4.0

df_filter <- df %>% filter(consent == 1 & group != 2)
df_filter$treatment <- df_filter$group
df_filter$treatment[df_filter$treatment == 0] <- "Control"
df_filter$treatment[df_filter$treatment == 1] <- "Treatment"

attrition <- df_filter[ , c(17:53, 54:58, 119:123)]
pretreatment <- attrition %>% select(1:37)
posttreatment <- attrition %>% select(38:47)
attrition <- cbind(pretreatment, df_filter$treatment, posttreatment)
colnames(attrition)[38] <- "Treatment_given"
attrition <- attrition[ , -c(11, 13, 15, 17)]
colnames(attrition)[2] <- "age"
colnames(attrition)[3] <- "educ"
colnames(attrition)[9] <- "income"
colnames(attrition)[31] <- "policy_trust"

attrition_plot <- 
  attritevis::plot_attrition(data = attrition,
                             freq = FALSE,
                             treatment_q = "Treatment_given",
                             outcome_q = "demo1_NPS_GROUP",
                             mycolors = c(Control = "#000066",
                                          Treatment = "#CC0033"),
                             total = FALSE,
                             tline = FALSE)
ggsave("attrition_plot.pdf", width = 8, height = 6)
